Online-Academy
Look, Read, Understand, Apply

Data Mining And Data Warehousing

Data warehouse - Example - i

Star Schema Overview (Sales Data)

Star schema has:
One central fact table :- sales_fact
Multiple dimension tables :- time_dim, location_dim, product_dim

Time - Dimension
CREATE TABLE time_dim (
    time_id INT PRIMARY KEY,
    date DATE,
    day INT,
    month INT,
    month_name VARCHAR(20),
    quarter INT,
    year INT
);

Location - Dimension
CREATE TABLE location_dim (
    location_id INT PRIMARY KEY,
    city VARCHAR(50),
    state VARCHAR(50),
    country VARCHAR(50),
    region VARCHAR(50)
);

 Product - Dimension
CREATE TABLE product_dim (
    product_id INT PRIMARY KEY,
    product_name VARCHAR(100),
    category VARCHAR(50),
    brand VARCHAR(50)
);

Fact table - sales_fact
CREATE TABLE sales_fact (
    sales_id INT PRIMARY KEY,
    time_id INT,
    location_id INT,
    product_id INT,
    quantity_sold INT,
    sales_amount DECIMAL(10,2),

    FOREIGN KEY (time_id) REFERENCES time_dim(time_id),
    FOREIGN KEY (location_id) REFERENCES location_dim(location_id),
    FOREIGN KEY (product_id) REFERENCES product_dim(product_id)
);

Insert Records to time_dim table
INSERT INTO time_dim VALUES
(1, '2025-01-15', 15, 1, 'January', 1, 2025),
(2, '2025-02-10', 10, 2, 'February', 1, 2025);

Insert Records to location_dim table
INSERT INTO location_dim VALUES
(1, 'Kathmandu', 'Bagmati', 'Nepal', 'Central'),
(2, 'Pokhara', 'Gandaki', 'Nepal', 'Western');

Insert Records to product_dim table
INSERT INTO product_dim VALUES
(1, 'Face Cream', 'Skincare', 'GlowCare'),
(2, 'Sunscreen', 'Skincare', 'SunSafe');

Insert Records to sales_fact table
INSERT INTO sales_fact VALUES
(1, 1, 1, 1, 10, 5000.00),
(2, 2, 2, 2, 5, 3000.00);

Roll-Up (Summarization): Total sales by year
SELECT 
    t.year,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN time_dim t ON s.time_id = t.time_id
GROUP BY t.year;

Total sales by product category
SELECT 
    p.category,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN product_dim p ON s.product_id = p.product_id
GROUP BY p.category;

Drill-Down (More detailed view): Sales by month for each year
SELECT 
    t.year,
    t.month_name,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN time_dim t ON s.time_id = t.time_id
GROUP BY t.year, t.month, t.month_name
ORDER BY t.year, t.month;

Slice (Fix one dimension): Sales for the year 2025 only
SELECT 
    p.product_name,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN time_dim t ON s.time_id = t.time_id
JOIN product_dim p ON s.product_id = p.product_id
WHERE t.year = 2025
GROUP BY p.product_name;

Dice (Multiple conditions): Sales for Skincare products in Kathmandu
SELECT 
    t.month_name,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN time_dim t ON s.time_id = t.time_id
JOIN product_dim p ON s.product_id = p.product_id
JOIN location_dim l ON s.location_id = l.location_id
WHERE p.category = 'Skincare'
  AND l.city = 'Kathmandu'
GROUP BY t.month_name;

Pivot (Cross-tab / multidimensional view): Sales by product across cities
SELECT 
    p.product_name,
    SUM(CASE WHEN l.city = 'Kathmandu' THEN s.sales_amount ELSE 0 END) AS Kathmandu_Sales,
    SUM(CASE WHEN l.city = 'Pokhara' THEN s.sales_amount ELSE 0 END) AS Pokhara_Sales
FROM sales_fact s
JOIN product_dim p ON s.product_id = p.product_id
JOIN location_dim l ON s.location_id = l.location_id
GROUP BY p.product_name;

Ranking (Top-N analysis): Top 3 products by sales
SELECT 
    p.product_name,
    SUM(s.sales_amount) AS total_sales
FROM sales_fact s
JOIN product_dim p ON s.product_id = p.product_id
GROUP BY p.product_name
ORDER BY total_sales DESC
FETCH FIRST 3 ROWS ONLY;

OLAP (Advanced): Running total of sales by month
SELECT 
    t.year,
    t.month_name,
    SUM(s.sales_amount) AS monthly_sales,
    SUM(SUM(s.sales_amount)) OVER (ORDER BY t.year, t.month) AS running_total
FROM sales_fact s
JOIN time_dim t ON s.time_id = t.time_id
GROUP BY t.year, t.month, t.month_name;